###############################################################################
## Replication of Main Analyses and Results
## Schub, "Informing the Leader: Bureaucracies and International Crises", APSR
###############################################################################

## I.  Bureaucracy-Crisis as Unit of Analysis
 ## Manuscript Tables 1-4
 ## Manuscript Figures 3-4
 ## Supporting Information: S5.1-S5.2 Robustness -- Content
 	## Tables A5-A8
 	## Randomization Inference and Bayes Factor
 	## Figure A2
 ## Supporting Information: S5.3 Robustness -- Uncertainty
 	## Tables A12-A15
 	## Randomization Inference and Bayes Factor
 	## Figures A3-A4

## II. Individual Speaker-Crisis as Unit of Analysis
 ## Supporting Information: S5.2 Robustness -- Content
    ## Tables A9-A11
 ## Supporting Information: S5.3 Robustness -- Uncertainty
    ## Tables A16-A17
  
## III. Corpus Characteristics 
 ## Supporting Information: S2
  	## Corpus descriptives
  	## Influential speakers 
  	
## IV.  Internal Bureaucracy Communication
 ## Supporting Information: S6  	
  		

##################################
#### Load Libraries
##################################
library(stargazer)
library(ggplot2)
library(BayesFactor)
library(xtable)
library(plyr)

######################################################################################################
######################################################################################################
#### I. Bureaucracy-Crisis as Unit of Analysis
######################################################################################################
######################################################################################################

## set the appropriate working directory and load data
setwd("")
data<-read.csv("BureaucracyLevel_Polished.csv")

##################################
#### Manuscript Tables 1-4
##################################

################
## Table 1: Bureaucratic Role and Advisory Content
  # Continuous outcome measure
cc1<-lm(fnbstd ~ state, data=data) #bivariate
cc2<-lm(fnbstd ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #controls
cc3<-lm(fnbstd ~ state + factor(obs), data=data) #FEs
cc4<-lm(fnbstd ~ state + factor(obs), data=data[data$role=="state"|data$role=="defense"|data$role=="jcs",]) #FEs and subset to ideal types

 # Binary outcome measure
cb1<-lm(fnbpolitical ~ state, data=data) #bivariate
cb2<-lm(fnbpolitical ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #controls
cb3<-lm(fnbpolitical ~ state + factor(obs), data=data) #FEs
cb4<-lm(fnbpolitical ~ state + factor(obs), data=data[data$role=="state"|data$role=="defense"|data$role=="jcs",]) #FEs and subset to ideal types

stargazer(cc1,cc2,cc3,cc4,cb1,cb2,cb3,cb4,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

 #outcome means
mean(data$fnbstd) #models 1-3
mean(data$fnbstd[data$role=="state"|data$role=="defense"|data$role=="jcs"]) #model 4
mean(data$fnbpolitical) #models 5-7
mean(data$fnbpolitical[data$role=="state"|data$role=="defense"|data$role=="jcs"]) #model 8


################
## Table 2: Bureaucratic Role and Advisory Uncertainty
u1<-lm(uper.bcracy100 ~ state, data=data) #bivariate
u2<-lm(uper.bcracy100 ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #controls
u3<-lm(uper.bcracy100 ~ state + factor(obs), data=data) #FEs
u4<-lm(uper.bcracy100 ~ state + factor(obs), data=data[data$role=="state"|data$role=="defense"|data$role=="jcs",]) #FEs and subset to ideal types
u5<-lm(uper.bcracy100 ~ state + factor(obs), data=data[data$competent2==1,]) #FEs and subset to ideal types discussing areas of expertise

stargazer(u1,u2,u3,u4,u5,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

 #outcome means
mean(data$uper.bcracy100) #models 1-3
mean(data$uper.bcracy100[data$role=="state"|data$role=="defense"|data$role=="jcs"]) #model 4
mean(data$uper.bcracy100[data$competent2==1],na.rm=TRUE) #model 5


################
## Table 3 (full results in Table A18): Sources of Uncertainty Differences Across Bureaucracies (provides full results beyond what is reported in the manuscript or SI)
im1<-lm(uper.bcracy100 ~ fnbstd + relcap + polity + lnhost.dist + nonstate.enemy + republican, data) #crisis controls
im2<-lm(uper.bcracy100 ~ fnbstd + factor(role) + relcap + polity + lnhost.dist + nonstate.enemy + republican, data) #crisis controls + bureaucracy FEs
im3<-lm(uper.bcracy100 ~ fnbstd + state + fnbstd:state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #crisis controls

stargazer(im1,im2,im3,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

 #outcome mean
mean(data$uper.bcracy100) #models 1-3


################
## Table 4: Counts of Relative Aggressiveness by Bureaucracy 
  # Raw counts (based on simple tally of codings in SI S1 where 0=defense more aggressive; 1=roughly similar; 2=state more aggressive)
obs<-c(1,1,0,1,2,2,0,1,1,2,1,0,0,1,1,1,0,1,1,0,0,0,1,2,2,0,0,1,1,1,1,1,0,1,2,0,2,1,1,2,0,1,1,2,0,1,0,2,2,2,0,1,2,2,2,1,0,1,1,1,0)
xt<-matrix(table(obs),nrow=1)
xtable(xt)
  # t-test
t.test(obs,mu=1) 


##################################
#### Manuscript Figures 3-4
##################################

################
## Figure 3: Left Panel
lab<-c("Defense","Other","State") # three roles
data$role2num<-as.numeric(as.factor(data$role2))
polc<-c(mean(data$fnbstd[data$role2=="state"]),mean(data$fnbstd[data$role2=="other"]),mean(data$fnbstd[data$role2=="defense"])) # mean for each role
fig3left<-ggplot(data, aes(y=as.numeric(role2num),x=fnbstd)) + geom_point(shape=1,size=2) + theme_bw(base_size=18) + xlab("Political Content Score \n") + scale_y_continuous(name="", labels=lab, breaks=c(1,2,3), limits=c(0.5,3.5)) + ggtitle("Content by Bureaucracy \n") + theme(plot.title=element_text(hjust=0.5)) + ggplot2::annotate("point",x=c(rev(polc)),y=c(1:3),col="black",shape=18,size=7)

fig3left 

################
## Figure 3: Right Panel
b<-c(summary(cc1)$coefficients[2,1],summary(cc2)$coefficients[2,1], summary(cc3)$coefficients[2,1], summary(cc4)$coefficients[2,1]) #coefficients
se<-c(summary(cc1)$coefficients[2,2], summary(cc2)$coefficients[2,2],summary(cc3)$coefficients[2,2], summary(cc4)$coefficients[2,2]) #SEs
blow<-b-1.96*se
bhigh<-b+1.96*se
blow10<-b-1.69*se
bhigh10<-b+1.69*se
count<-c(1:4)
mes<-as.data.frame(cbind(b,se,blow,bhigh,blow10,bhigh10))
mes$x<-count
corelab<-c("Bivariate","Case Covariates","Case Fixed Effects","Case Fixed Effects \n(vs. Defense only)")
fig3right<-ggplot(mes,aes(x=b,y=x)) +  theme_bw() + geom_vline(xintercept=0, linetype=2) + xlab("Effect of State Deptartment\non Political Content Score")   +  theme_bw(base_size=18) + scale_y_continuous(name="",labels=corelab,breaks=c(1,2,3,4), limits=c(0.8,4.2)) + ggtitle("Marginal Effect of \n Bureaucratic Role") + theme(plot.title=element_text(hjust=0.5))+scale_x_continuous(limits=c(0,0.3)) + geom_segment(aes(x=blow,y=x,xend=bhigh,yend=x),size=1.1) + geom_segment(aes(x=blow10,y=x,xend=bhigh10,yend=x),size=1.7) + geom_point(,size=3)

fig3right



################
## Figure 4: Left Panel
  # state
s<-mean(data$uper.bcracy100[data$role2=="state"],na.rm=TRUE)
sp<-mean(data$uper.bcracy100[data$role2=="state" & data$fnbpolitical==1],na.rm=TRUE)
sm<-mean(data$uper.bcracy100[data$role2=="state" & data$fnbpolitical==0],na.rm=TRUE)
  # pentagon
p<-mean(data$uper.bcracy100[data$role2=="defense"],na.rm=TRUE)
pp<-mean(data$uper.bcracy100[data$role2=="defense" & data$fnbpolitical==1],na.rm=TRUE)
pm<-mean(data$uper.bcracy100[data$role2=="defense" & data$fnbpolitical==0],na.rm=TRUE)
  # other
m<-mean(data$uper.bcracy100[data$role2=="other"],na.rm=TRUE)
mp<-mean(data$uper.bcracy100[data$role2=="other" & data$fnbpolitical==1],na.rm=TRUE)
mm<-mean(data$uper.bcracy100[data$role2=="other" & data$fnbpolitical==0],na.rm=TRUE)

names<-c("State","Other","Defense")
all<-c(s,m,p)
pols<-c(sp,mp,pp)
mils<-c(sm,mm,pm)
values<-as.numeric(c(all,pols,mils))
groupraw<-c("State","Other","Defense")
group<-rep(groupraw,3)
subgroup<-c(rep("All",3),rep("Political",3),rep("Military",3))
summdf<-as.data.frame(cbind(values,group,subgroup))
summdf$values<-as.numeric(levels(summdf$values))[summdf$values]
summdf$group<-factor(summdf$group, levels=c("State","Other","Defense"))
summdf$group2<-factor(summdf$group, levels=c("Defense","Other","State"))
summdf$subgroup<-factor(summdf$subgroup, levels=c("All","Political","Military"))
summdf$subgroup2<-factor(summdf$subgroup, levels=c("Military","Political","All"))
see<-0.3
transp<-c(1,1,1,see,see,see,see,see,see)
summdf$transpdf<-as.factor(transp)
transp2<-c(1,see,see)
summdf2<-rbind(summdf[7:9,],summdf[4:6,],summdf[1:3,])
fig4left<-ggplot() + geom_bar(data=summdf2, aes(fill=subgroup2, y=values, x=group2, alpha=transpdf), width=0.7,position="dodge", stat="identity")  + scale_alpha_manual(values=c("0.3"=0.3, "1"=1), guide='none') + scale_fill_manual("legend", values = c("Political" = "grey70", "Military" = "grey40","All" = "black")) + theme_bw(base_size=18) + xlab("") + ylab("Uncertainty Score (average)\n") + theme(legend.position="left") + ggtitle("Uncertainty by Content\nand Bureaucracy") + theme(plot.title=element_text(hjust=0.5))    + guides(fill = guide_legend(title="Content", reverse=TRUE, override.aes = list(alpha = c(1,0.4,0.4)))) + coord_flip(ylim=c(4,6))

fig4left


################
## Figure 4: Right Panel
b<-c(summary(u1)$coefficients[2,1],summary(u2)$coefficients[2,1], summary(u3)$coefficients[2,1], summary(u4)$coefficients[2,1], summary(u5)$coefficients[2,1])
se<-c(summary(u1)$coefficients[2,2],summary(u2)$coefficients[2,2], summary(u3)$coefficients[2,2], summary(u4)$coefficients[2,2], summary(u5)$coefficients[2,2])
blow<-b-1.96*se
bhigh<-b+1.96*se
blow10<-b-1.69*se
bhigh10<-b+1.69*se
count<-c(1:5)
mes<-as.data.frame(cbind(b,se,blow,bhigh,blow10,bhigh10))
mes$x<-count
corelab<-c("Bivariate","Case Covariates","Case Fixed Effects","Case Fixed Effects\n(vs. Defense only)","Case Fixed Effects\n(State political content vs.\n Defense military content)" )
fig4right<- ggplot(mes,aes(x=b,y=x)) +  theme_bw() + geom_vline(xintercept=0, linetype=2) + xlab("Effect of State Deptartment\non Uncertainty")   +  theme_bw(base_size=18) + scale_y_continuous(name="",labels=corelab,breaks=c(1,2,3,4,5), limits=c(0.8,5.2)) + ggtitle("Marginal Effect of \n Bureaucratic Role") + theme(plot.title=element_text(hjust=0.5))+scale_x_continuous(limits=c(0,2.9)) + geom_segment(aes(x=blow,y=x,xend=bhigh,yend=x),size=1.1) + geom_segment(aes(x=blow10,y=x,xend=bhigh10,yend=x),size=1.7) + geom_point(,size=3)

fig4right




####################################################################
#### Supporting Information: S5.1-S5.2 Robustness -- Content
####################################################################

################
## Table A5: Summary Statistics
vars<-c("fnbstd","fnbpolitical","uper.bcracy100","state","rawtotwords","relcap","polity","lnhost.dist","nonstate.enemy","republican")
output<-data[c(vars)]
stargazer(output,digits=2,omit.summary.stat=c("p25","p75"),covariate.labels=c("Political Content Score","Political","Uncertainty","State Department","Total Words","Relative Capabilities","Regime Type","Distance (log)","Non-state Enemy","Republic Admin."))

################
## Table A6: Logistic regression for binary outcome variable
cbb1<-glm(fnbpolitical ~ state, data=data, family="binomial")
cbb2<-glm(fnbpolitical ~ state + factor(obs), data=data, family="binomial")
cbb3<-glm(fnbpolitical ~ state + factor(obs), data=data[data$role=="state"|data$role=="defense"|data$role=="jcs",], family="binomial")
stargazer(cbb1,cbb2,cbb3, align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

################
## Table A7: Crisis-level control variables (manuscript table 1 models 2 and 6)
cc2<-lm(fnbstd ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #controls
cb2<-lm(fnbpolitical ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #controls
stargazer(cc2,cb2,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

################
## Table A8: Exclude potentially distinct observations
  # identify nuclear program case
data$nukes<-ifelse(data$obs=="SovietNuclear",1,0)
  # identify vietnam war repeats
data$namrepeats<-ifelse((data$obs=="Pleiku"|data$obs=="CambodiaInvasion"|data$obs=="ChristmasBombing"|data$obs=="Pleiku"|data$obs=="PortsMining"|data$obs=="Tet"),1,0) #keep first major decision of each admin (Tonkin for Johnson; Cambodia bombing for Nixon; Saigon Fall for Ford)

cdn1<-lm(fnbstd ~ state, data=data[data$nukes!=1,])
cdn2<-lm(fnbstd ~ state + factor(obs), data=data[data$nukes!=1,])
cdv1<-lm(fnbstd ~ state, data=data[data$namrepeats!=1,])
cdv2<-lm(fnbstd ~ state + factor(obs), data=data[data$namrepeats!=1,])
stargazer(cdn1,cdn2,cdv1,cdv2, align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

################
## Randomization inference (SI S5.2)
  # uses ideal type bureaucracies 
   # define the real test statistic as difference in means between state and jcs/non-jcs defense
realts<-mean(data$fnbstd[data$role=="state"])-mean(data$fnbstd[data$role=="defense"|data$role=="jcs"])
subcontent<-data$fnbstd[data$role=="state"|data$role=="defense"|data$role=="jcs"] #values that go into the test statistic
statepct<-length(data$fnbstd[data$role=="state"])/length(subcontent) #percentage of observations that should receive "state"=1 in each permutation

set.seed(02138)
n<-10000
matt<-matrix(NA,nrow=n,ncol=4)
for(i in 1:n){
	rand<-rbinom(length(subcontent),1,statepct)
	newdf<-as.data.frame(cbind(subcontent,rand))
	state<-mean(newdf$subcontent[newdf$rand==1])
	mil<-mean(newdf$subcontent[newdf$rand==0])
	out<-state-mil
	matt[i,1]<-i
	matt[i,2]<-state
	matt[i,3]<-mil
	matt[i,4]<-out
}

matt<-as.data.frame(matt)
colnames(matt)<-c("run","statemean","milmean","ts")
matt$absts<-abs(matt$ts)
matt$extreme<-ifelse(matt$absts>abs(realts),1,0)
mean(matt$extreme) #0 instances of a test stat as extreme
realts #observed test statistic=0.19
max(matt$absts) #maximum permutation test statistic=0.18


################
## Bayes factor (SI S5.2)
  # Include State + Covariates vs. only Covariates
bfcovstate<-lmBF(fnbstd ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data)
bfcovonly<-lmBF(fnbstd ~ relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data)
bfcovstate/bfcovonly #BF=116
  # Include State + Covariates vs. only Covariates | Ideal Types
bfcovstated<-lmBF(fnbstd ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data[data$role2!="other",])
bfcovonlyd<-lmBF(fnbstd ~ relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data[data$role2!="other",])
bfcovstated/bfcovonlyd #BF=2883


################
## Figure A2: Content by Bureaucracy (disaggregating into all five bureaucracies)
lab52<-c("JCS","Defense","CIA","NSC/WH","State")
data$rolenum<-NA
data$rolenum[data$role=="state"]<-5
data$rolenum[data$role=="executive"]<-4
data$rolenum[data$role=="cia"]<-3
data$rolenum[data$role=="defense"]<-2
data$rolenum[data$role=="jcs"]<-1

polc5<-c(mean(data$fnbstd[data$role=="state"]),mean(data$fnbstd[data$role=="executive"]),mean(data$fnbstd[data$role=="cia"]),mean(data$fnbstd[data$role=="defense"]),mean(data$fnbstd[data$role=="jcs"]))
figA2<-ggplot(data, aes(y=as.numeric(rolenum),x=fnbstd)) + geom_point(shape=1,size=2) + theme_bw(base_size=18) + xlab("Political Content Score \n") + scale_y_continuous(name="", labels=lab52, breaks=c(1,2,3,4,5), limits=c(0.5,5.5)) + ggtitle("Content by Bureaucracy \n") + theme(plot.title=element_text(hjust=0.5)) + ggplot2::annotate("point",x=c(rev(polc5)),y=c(1:5),col="black",shape=18,size=7)

figA2


####################################################################
#### Supporting Information: S5.3 Robustness -- Uncertainty
####################################################################

################
## Table A12: Control variables (full results for manuscript table 2 model 2)
u2<-lm(uper.bcracy100 ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) 
stargazer(u2,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

################
## Table A13: Alternative uncertainty dictionary (Loughran and McDonald 2011)
au1<-lm(lper.bcracy100 ~ state, data=data)
au2<-lm(lper.bcracy100 ~ state + factor(obs), data=data)
au3<-lm(lper.bcracy100 ~ state + factor(obs), data=data[data$role=="state"|data$role=="defense"|data$role=="jcs",])
au4<-lm(lper.bcracy100 ~ state + factor(obs), data=data[data$competent2==1,])
stargazer(au1,au2,au3,au4,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

  # correlation between uncertainty dictionaries
cor(data$uper.bcracy100,data$lper.bcracy100)


################
## Table A14: Placebo dictionary ("Religous" dictionary)
pu1<-lm(rper.bcracy100 ~ state, data=data)
pu2<-lm(rper.bcracy100 ~ state + factor(obs), data=data)
pu3<-lm(rper.bcracy100 ~ state + factor(obs), data=data[data$role=="state"|data$role=="defense"|data$role=="jcs",])
pu4<-lm(rper.bcracy100 ~ state + factor(obs), data=data[data$competent2==1,])
stargazer(pu1,pu2,pu3,pu4,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

################
## Table A15: Exclude potentially distinct observations (nuclear cases and repeat vietnam war cases)
udn1<-lm(uper.bcracy100 ~ state, data=data[data$nukes!=1,])
udn2<-lm(uper.bcracy100 ~ state + factor(obs), data=data[data$nukes!=1,])
udv1<-lm(uper.bcracy100 ~ state, data=data[data$namrepeats!=1,])
udv2<-lm(uper.bcracy100 ~ state + factor(obs), data=data[data$namrepeats!=1,])
stargazer(udn1,udn2,udv1,udv2, align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)

################
## Randomization inference (SI S5.3)
  # uses ideal type bureaucracies 
   # define the real test statistic as difference in means between state and jcs/non-jcs defense
realts<-mean(data$uper.bcracy100[data$role=="state"])-mean(data$uper.bcracy100[data$role=="defense"|data$role=="jcs"])
subcontent<-data$uper.bcracy100[data$role=="state"|data$role=="defense"|data$role=="jcs"] #values that go into the test statistic
statepct<-length(data$uper.bcracy100[data$role=="state"])/length(subcontent) #percentage of observations that should receive "state"=1 in each permutation

set.seed(02138)
n<-10000
mattu<-matrix(NA,nrow=n,ncol=4)
for(i in 1:n){
	rand<-rbinom(length(subcontent),1,statepct)
	newdf<-as.data.frame(cbind(subcontent,rand))
	state<-mean(newdf$subcontent[newdf$rand==1])
	mil<-mean(newdf$subcontent[newdf$rand==0])
	out<-state-mil
	mattu[i,1]<-i
	mattu[i,2]<-state
	mattu[i,3]<-mil
	mattu[i,4]<-out
}

mattu<-as.data.frame(mattu)
colnames(mattu)<-c("run","statemean","milmean","ts")
summary(mattu$ts)
mattu$absts<-abs(mattu$ts)
mattu$extreme<-ifelse(mattu$absts>abs(realts),1,0)
mean(mattu$extreme) #3% more extreme than the observed test statistic

################
## Bayes factor (SI S5.3)
  # Include State + Covariates vs. only Covariates
bfcovstate<-lmBF(uper.bcracy100 ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data)
bfcovonly<-lmBF(uper.bcracy100 ~ relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data)
bfcovstate/bfcovonly #BF=2.6
  # Include State + Covariates vs. only Covariates | Ideal Types
bfcovstated<-lmBF(uper.bcracy100 ~ state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data[data$role2!="other",])
bfcovonlyd<-lmBF(uper.bcracy100 ~ relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data[data$role2!="other",])
bfcovstated/bfcovonlyd #BF=2.9

################
## Figure A3: Uncertainty by Bureaucracy (disaggregating into all five bureaucracies)
  # state
s<-mean(data$uper.bcracy100[data$role=="state"],na.rm=TRUE)
sp<-mean(data$uper.bcracy100[data$role=="state" & data$fnbpolitical==1],na.rm=TRUE)
sm<-mean(data$uper.bcracy100[data$role=="state" & data$fnbpolitical==0],na.rm=TRUE)
  # defense (non-jcs)
d<-mean(data$uper.bcracy100[data$role=="defense"],na.rm=TRUE)
dp<-mean(data$uper.bcracy100[data$role=="defense" & data$fnbpolitical==1],na.rm=TRUE)
dm<-mean(data$uper.bcracy100[data$role=="defense" & data$fnbpolitical==0],na.rm=TRUE)
  # jcs
j<-mean(data$uper.bcracy100[data$role=="jcs"],na.rm=TRUE)
jp<-mean(data$uper.bcracy100[data$role=="jcs" & data$fnbpolitical==1],na.rm=TRUE)
jm<-mean(data$uper.bcracy100[data$role=="jcs" & data$fnbpolitical==0],na.rm=TRUE)
  # cia
c<-mean(data$uper.bcracy100[data$role=="cia"],na.rm=TRUE)
cp<-mean(data$uper.bcracy100[data$role=="cia" & data$fnbpolitical==1],na.rm=TRUE)
cm<-mean(data$uper.bcracy100[data$role=="cia" & data$fnbpolitical==0],na.rm=TRUE)
  # exec
e<-mean(data$uper.bcracy100[data$role=="executive"],na.rm=TRUE)
ep<-mean(data$uper.bcracy100[data$role=="executive" & data$fnbpolitical==1],na.rm=TRUE)
em<-mean(data$uper.bcracy100[data$role=="executive" & data$fnbpolitical==0],na.rm=TRUE)
names<-c("State","NSC/WH","CIA","Defense","JCS")
all<-c(s,e,c,d,j)
pols<-c(sp,ep,cp,dp,jp)
mils<-c(sm,em,cm,dm,jm)
values<-as.numeric(c(all,pols,mils))
groupraw<-c("State","NSC/WH","CIA","Defense","JCS")
group<-rep(groupraw,3)
subgroup<-c(rep("All",5),rep("Political",5),rep("Military",5))
summdf<-as.data.frame(cbind(values,group,subgroup))
summdf$values<-as.numeric(levels(summdf$values))[summdf$values]
summdf$group<-factor(summdf$group, levels=c("State","NSC/WH","CIA","Defense","JCS"))
summdf$group2<-factor(summdf$group, levels=c("JCS","Defense","CIA","NSC/WH","State"))
summdf$subgroup<-factor(summdf$subgroup, levels=c("All","Political","Military"))
summdf$subgroup2<-factor(summdf$subgroup, levels=c("Military","Political","All"))
see<-0.3
transp<-c(1,1,1,1,1,see,see,see,see,see,see,see,see,see,see)
summdf$transpdf<-as.factor(transp)
transp2<-c(1,see,see)
summdf2<-rbind(summdf[11:15,],summdf[6:10,],summdf[1:5,])

figA3<-ggplot() + geom_bar(data=summdf2, aes(fill=subgroup2, y=values, x=group2, alpha=transpdf), width=0.7,position="dodge", stat="identity")  + scale_alpha_manual(values=c("0.3"=0.3, "1"=1), guide='none') + scale_fill_manual("legend", values = c("Political" = "grey70", "Military" = "grey40","All" = "black")) + theme_bw(base_size=18) + xlab("") + ylab("Uncertainty Score (average)\n") + theme(legend.position="left") + ggtitle("Uncertainty by Content\nand Bureaucracy") + theme(plot.title=element_text(hjust=0.5))    + guides(fill = guide_legend(title="Content", reverse=TRUE, override.aes = list(alpha = c(1,0.4,0.4)))) + coord_flip(ylim=c(4,6))
figA3 #*Note: JCS has 0 observations coded as political content


################
## Figure A4: Decomposing Uncertainty Differences
  # Manuscript Table 3 Model 3 (motivates the analysis and figure)
im3<-lm(uper.bcracy100 ~ fnbstd + state + fnbstd:state + relcap + polity + lnhost.dist + nonstate.enemy + republican, data=data) #crisis controls 

  # Calculating total gap to be accounted for
    # Predicted value for state given state's mean political content (strip out intercept and controls)
sreal<-mean(data$fnbstd[data$state==1])*(summary(im3)$coefficients[9,1]+summary(im3)$coefficients[2,1])+summary(im3)$coefficients[3,1]
    # Predicted value for non-state given non-state's mean political content (strip out intercept and controls)
nsreal<-mean(data$fnbstd[data$state==0])*(summary(im3)$coefficients[2,1])
sreal-nsreal # total gap=0.62

   # Calculating gap due to disposition/cultural mechanism [take the gap in predicted values at some point that is reasonable: here i use the minimum political content score of a state observation]
basegap<-min(data$fnbstd[data$state==1])*summary(im3)$coefficients[9,1]+summary(im3)$coefficients[3,1]
basegap # dispositional gap=0.19

   # Calculating gap due to information-composition
margpol<-mean(data$fnbstd[data$state==1])-mean(data$fnbstd[data$state==0]) # avg difference in content
margpolcontuper<-margpol*summary(im3)$coefficients[2,1] # how much more uncertain would non-state officials be if they evaluated as much political content as state does
margpolcontuper # information-composition gap=0.20

   # Calculating gap due to information-expertise
pollevels<-seq(0,1,by=0.01) #to find values across the full range
reps<-length(pollevels)
newdat<-data.frame(state=c(rep(0,reps),rep(1,reps)), fnbstd=rep(pollevels,2), relcap=rep(mean(data$relcap),reps*2) , polity=rep(mean(data$polity),reps*2) , lnhost.dist=rep(mean(data$lnhost.dist),reps*2) , nonstate.enemy =rep(mean(data$nonstate.enemy),reps*2) , republican=rep(mean(data$republican),reps*2))
pred<-as.data.frame(predict(im3, newdata=newdat, interval="confidence"))  
pred$state<-newdat$state
pred$pol<-newdat$fnbstd 
expertise<-(sreal+pred$fit[1]-basegap)-(nsreal+pred$fit[1]+margpolcontuper) # backs out the remaining gap
expertise # information-expertise gap=0.23

   # Figure A4
plot(pred$pol[pred$state==0 & pred$pol>min(data$fnbstd[data$state==0]) & pred$pol<max(data$fnbstd[data$state==0])],pred$fit[pred$state==0 & pred$pol>min(data$fnbstd[data$state==0]) & pred$pol<max(data$fnbstd[data$state==0])],type="l",ylim=c(4.2,6.3),lwd=2,xlab="",ylab="",col="firebrick")
points(pred$pol[pred$state==1 & pred$pol>min(data$fnbstd[data$state==1]) & pred$pol<max(data$fnbstd[data$state==1])],pred$fit[pred$state==1 & pred$pol>min(data$fnbstd[data$state==1]) & pred$pol<max(data$fnbstd[data$state==1])],type="l",lwd=2,col="dodgerblue3")
points(mean(data$fnbstd[data$state==1]),sreal+pred$fit[1],pch=19,cex=1.4,col="dodgerblue3")
points(mean(data$fnbstd[data$state==0]),nsreal+pred$fit[1],pch=19,cex=1.4,col="firebrick")
points(mean(data$fnbstd[data$state==1]),nsreal+pred$fit[1]+margpolcontuper,pch=2,cex=1.4)
points(pred$pol[pred$state==1 & pred$pol>min(data$fnbstd[data$state==1]) & pred$pol<max(data$fnbstd[data$state==1])],pred$fit[pred$state==1 & pred$pol>min(data$fnbstd[data$state==1]) & pred$pol<max(data$fnbstd[data$state==1])]-basegap,type="l",lty=3)
segments(min(data$fnbstd[data$state==1]),0,min(data$fnbstd[data$state==1]),min(pred$fit[pred$state==1 & pred$pol>min(data$fnbstd[data$state==1]) & pred$pol<max(data$fnbstd[data$state==1])])-basegap,type="l",lty=3)
points(mean(data$fnbstd[data$state==1]),nsreal+pred$fit[1],pch=1,cex=1.4,col="firebrick")
points(mean(data$fnbstd[data$state==1]),sreal+pred$fit[1]-basegap,pch=5,cex=1.4)
text(0.65,5.12,"1",cex=1.1)
text(0.65,5.35,"3",cex=1.1)
text(0.65,5.56,"2",cex=1.1)
text(min(data$fnbstd[data$state==1]),4.7,"2",cex=1.1)
rug(data$fnbstd[data$state==1],lwd=1)
segments(mean(data$fnbstd[data$state==1]),0,mean(data$fnbstd[data$state==1]),sreal+pred$fit[1],lty=2,col="dodgerblue3")
segments(mean(data$fnbstd[data$state==0]),0,mean(data$fnbstd[data$state==0]),nsreal+pred$fit[1],lty=2,col="firebrick")
text(0.74,6.03,"State",cex=1.2,col="dodgerblue3")
text(0.84,5.35,"Non-State",cex=1.2,col="firebrick")
arrows(mean(data$fnbstd[data$state==0]),nsreal+pred$fit[1],mean(data$fnbstd[data$state==1])-0.01,nsreal+pred$fit[1],length=0.1,col="firebrick")
mtext("Political Content Score",1,outer=FALSE,line=2.5,cex=1.5)
mtext("Predicted Uncertainty",2,outer=FALSE,line=2.5,cex=1.5)
mtext("Decomposing Uncertainty Differences",3,outer=FALSE,line=1.2,cex=1.7)
text(0.22,6.3,"1. Information-composition",cex=1.2)
text(0.12,6.2,"2. Disposition",cex=1.2)
text(0.20,6.1,"3. Information-expertise",cex=1.2)



######################################################################################################
######################################################################################################
#### II. Individual Speaker-Crisis as Unit of Analysis
######################################################################################################
######################################################################################################

## set the appropriate working directory and load data
dat<-read.csv("SpeakerLevel_Polished.csv") 

  ## Narrow to core roles
keep<-dat[dat$role=="cia"|dat$role=="state"|dat$role=="defense"|dat$role=="executive"|dat$role=="jcs",]
  ## Set wordcount threshold
n<-30  #30 as baseline
key<-keep[keep$totwords>=n,]


############################################################################
#### Supporting Information: S5.2 Robustness -- Content at Individual Level
############################################################################

################
## Table A9: Individual speaker and content
  # Continuous
cm1<-lm(fnbstd ~ state, key)
cm2<-lm(fnbstd ~ state + factor(obs), key)
cm3<-lm(fnbstd ~ state + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
  # Binary
cb1<-lm(fnbpolitical ~ state, key)
cb2<-lm(fnbpolitical ~ state + factor(obs), key)
cb3<-lm(fnbpolitical ~ state + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
stargazer(cm1,cm2,cm3,cb1,cb2,cb3,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)


################
## Table A10: Individual speaker and content excluding leader names from classifier 
  # Continuous
cm1nn<-lm(fnbstd_nonames ~ state, key)
cm2nn<-lm(fnbstd_nonames ~ state + factor(obs), key)
cm3nn<-lm(fnbstd_nonames ~ state + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
  # Binary
cb1nn<-lm(fnbpolitical_nonames ~ state, key)
cb2nn<-lm(fnbpolitical_nonames ~ state + factor(obs), key)
cb3nn<-lm(fnbpolitical_nonames ~ state + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])

stargazer(cm1nn,cm2nn,cm3nn,cb1nn,cb2nn,cb3nn,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)



################
## Table A11: Individual speaker and content controlling for document type
  # Continuous
cm1m<-lm(fnbstd ~ state + memo, key)
cm2m<-lm(fnbstd ~ state + memo + factor(obs), key)
cm3m<-lm(fnbstd ~ state + memo + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
  # Binary
cb1m<-lm(fnbpolitical ~ state + memo, key)
cb2m<-lm(fnbpolitical ~ state + memo + factor(obs), key)
cb3m<-lm(fnbpolitical ~ state + memo + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
stargazer(cm1m,cm2m,cm3m,cb1m,cb2m,cb3m,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)


############################################################################
#### Supporting Information: S5.3 Robustness -- Uncertainty at Individual Level
############################################################################

################
## Table A16: Individual speaker and uncertainty 
u1<-lm(uper100 ~ state, key)
u2<-lm(uper100 ~ state + factor(obs), key)
u3<-lm(uper100 ~ state + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
u4<-lm(uper100 ~ state + factor(obs), key[key$competent2==1 & !is.na(key$competent2),])
stargazer(u1,u2,u3,u4,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)


################
## Table A17: Individual speaker and uncertainty controlling for document type
u1m<-lm(uper100 ~ state + memo, key)
u2m<-lm(uper100 ~ state + memo + factor(obs), key)
u3m<-lm(uper100 ~ state + memo + factor(obs), key[key$role=="state"|key$role=="defense"|key$role=="jcs",])
u4m<-lm(uper100 ~ state + memo + factor(obs), key[key$competent2==1 & !is.na(key$competent2),])
stargazer(u1m,u2m,u3m,u4m,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)




######################################################################################################
######################################################################################################
#### III. Corpus Characteristics 
######################################################################################################
######################################################################################################

################
## Corpus Descriptive Statistics
  # speakers, documents, and speech acts
length(unique(dat$name))  #unique speakers = 176
length(unique(dat$docid)) #number of unique documents = 382
sum(dat$times.speaker)  #unique speech acts (uninterrupted utterance or written segment) = 5404

  # memos vs. meetings
length(dat$memo[dat$memo==1])/sum(dat$times.speaker) #percentage of speech acts from memos = 3.2%
sum(dat$rawtotwords[dat$memo==1])/sum(dat$rawtotwords) #percentage of words from memos = 27.5%

  # meeting attributes
mts<-dat[dat$memo==0,]
parts<-as.data.frame(ddply(mts, .(docid,admin), summarize, out=length(names)))
parts2<-parts[parts$out>1,]
mean(parts2$out) #average number of meeting participants 4.7

  # crisis-level corpus attribute
sum(keep$rawtotwords)/length(unique(keep$obs)) #average words per crisis from the core bureaucracies = 3,442

################
## Corpus Influential Speakers by Administration (SI Table A2)
admins<-unique(key$admin)
holder<-c("empty")
for(i in 1:length(admins)){
	grab<-keep[keep$admin==admins[i],]
	form1<-ddply(grab,.(names),summarize,text=sum(functotwords))
	form2<-form1[order(-form1$text),]
	form3<-form2$names[1:8]
	holder<-append(holder,as.character(form3))
}
out1<-holder[-1]
out2<-as.data.frame(matrix(out1,ncol=6,byrow=FALSE))
colnames(out2)<-c("Carter","Ford","Nixon","Eisenhower","Kennedy","Johnson")
out2 # SI Table A2 reports top 7 by administration dropping generic bureaucracy speakers (e.g., "State" used when a particular speaker/author is not identified beyond the bureaucratic affialiation)




######################################################################################################
######################################################################################################
#### IV: Internal Bureaucracy Communication
######################################################################################################
######################################################################################################

################
## Preliminary Analysis of Communication *within* Bureaucracies (SI S6) 
internal<-read.csv("InternalBureaucracy.csv")

  ## State dicusses more political (vs. military) content as compared to Defense
summary(lm(fnbstd ~ as.factor(role),internal)) #continuous outcome measure
summary(lm(fnbpolitical ~ as.factor(role),internal)) #binary outcome measure

  ## State provides more uncertainty as compared to Defense (not statistically significant)
summary(lm(uper ~ as.factor(role),internal))







